{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "from keras.datasets import imdb\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.layers import LSTM\n",
    "from keras.layers.embeddings import Embedding\n",
    "from keras.preprocessing import sequence\n",
    "# fix random seed for reproducibility\n",
    "numpy.random.seed(7)\n",
    "\n",
    "# load the dataset but only keep the top n words, zero the rest\n",
    "top_words = 5000\n",
    "(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "218\n",
      "189\n",
      "1\n"
     ]
    }
   ],
   "source": [
    "print(len(X_train[0]))\n",
    "print(len(X_train[1]))\n",
    "print(y_train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "max_review_length = 500\n",
    "X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)\n",
    "X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_1 (Embedding)      (None, 500, 32)           160000    \n",
      "_________________________________________________________________\n",
      "lstm_1 (LSTM)                (None, 100)               53200     \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1)                 101       \n",
      "=================================================================\n",
      "Total params: 213,301\n",
      "Trainable params: 213,301\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "None\n",
      "Train on 25000 samples, validate on 25000 samples\n",
      "Epoch 1/3\n",
      "25000/25000 [==============================] - 1009s - loss: 0.5405 - acc: 0.7107 - val_loss: 0.5959 - val_acc: 0.6607\n",
      "Epoch 2/3\n",
      "25000/25000 [==============================] - 970s - loss: 0.3650 - acc: 0.8405 - val_loss: 0.3068 - val_acc: 0.8734\n",
      "Epoch 3/3\n",
      "25000/25000 [==============================] - 953s - loss: 0.2468 - acc: 0.9019 - val_loss: 0.2893 - val_acc: 0.8806\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x11cb8dd90>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create the model\n",
    "embedding_vecor_length = 32\n",
    "model = Sequential()\n",
    "model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))\n",
    "model.add(LSTM(100))\n",
    "model.add(Dense(1, activation='sigmoid'))\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
    "print(model.summary())\n",
    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 88.06%\n"
     ]
    }
   ],
   "source": [
    "# Final evaluation of the model\n",
    "scores = model.evaluate(X_test, y_test, verbose=0)\n",
    "print(\"Accuracy: %.2f%%\" % (scores[1]*100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
